1 Introduction

1.1 Libraries

library("dplyr")
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("igraph")
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
library("gprofiler2")

2 Methods

2.1 Load data

# Load network
if (!exists("modTOM")) {
 load("modTOM_step.RData")
}

# Load PPI from IID
dataFirst <- read.csv(file = "PPIs_1st.txt", header = TRUE)

Old removed code:

netFirstH2 <- induced_subgraph(wnet, vids = unlist(neighborhood(netFirst, order = 1, nodes = V(netFirst)[“HDAC2”])), impl = “copy_and_delete”) write_graph(netFirstH2, file = “netfirsth2.graphml”, format = “graphml”)

2.2 Create network

########## Create co-expression network ##########
net0 <- igraph::graph.adjacency(modTOM, weighted = TRUE, mode = "undirected")

# Remove self loops
net0 <- simplify(net0)

########## Create PPI network ##########

# Create network from 1st interactions
netFirst <- graph_from_data_frame(dataFirst[3:15])

# Get all lncRNA ids
allLnc <-
  probes1 %>% 
  filter(gene_type != "protein_coding") %>% 
  select(gene_name) %>% 
  unlist() %>% 
  unname()

# Get all EWS-FLI1 1st interactor ids
netFirstNodes <- names(V(netFirst))

# Index of net0 nodes to keep
i <- names(V(net0)) %in% c(netFirstNodes, allLnc)

# Subnetwork from net0 (all co-expression)
# Get a subnetwork with nodes from netFirst and allLnc
# Nodes are in PPI plus all lncRNA
netWgcnaPpiFirst0 <- induced_subgraph(net0, vids = i, impl = "copy_and_delete")

# Summary of netWgcnaPpiFirst weights
print("netWgcnaPpiFirst0 weight summary info")
## [1] "netWgcnaPpiFirst0 weight summary info"
print(summary(E(netWgcnaPpiFirst0)$weight))
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000049 0.0001523 0.0005636 0.0020555 0.0020125 0.3173470
# Compare to the original net0
print("net0 weight summary info")
## [1] "net0 weight summary info"
print(summary(E(net0)$weight))
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000039 0.0002450 0.0010423 0.0039343 0.0039293 0.3180248
# Only get connections above 90%
quantileValue = 0.90
i <- E(netWgcnaPpiFirst0)$weight > unname(quantile(E(netWgcnaPpiFirst0)$weight, quantileValue))

# Create a smaller subnetwork based on cutoff
netWgcnaPpiFirst <- subgraph.edges(netWgcnaPpiFirst0, E(netWgcnaPpiFirst0)[i], delete.vertices = TRUE)

########## Annotate network ##########

tempAnnot <- probes1 %>% filter(gene_name %in% names(V(netWgcnaPpiFirst)))

# Find duplicated itens
tempAnnot$gene_name[duplicated(tempAnnot$gene_name)]
## [1] "GOLGA8M" "HSPA14"
# Check wich gene_id to keep
geneInfo %>% filter(gene_name == c("GOLGA8M", "HSPA14"))
##           gene_id gene_name moduleColor
## 1 ENSG00000261480   GOLGA8M       black
## 2 ENSG00000284024    HSPA14   darkgreen
## 3 ENSG00000187522    HSPA14 darkmagenta
## 4 ENSG00000188626   GOLGA8M darkmagenta
# We should keep ENSG00000187522 and ENSG00000188626
tempAnnot <- subset(tempAnnot, gene_id != "ENSG00000187522" & gene_id != "ENSG00000188626")

# Match nodes order
tempAnnot <- arrange(tempAnnot, match(gene_name, names(V(netWgcnaPpiFirst))))

# Add node attributes to the network
V(netWgcnaPpiFirst)$gene_id <- tempAnnot$gene_id
V(netWgcnaPpiFirst)$gene_type <- tempAnnot$gene_type

# Save network for inspection
#write_graph(netWgcnaPpiFirst, file = "netWgcnaPpiFirst.graphml", format = "graphml")

There are duplicated items because some gene have the same name for lncRNA and their protein coding version. Here are only two. We can check their modules and keep only the right ones. Alternatively, we could work with ensembl ids from the start.

3 Results

3.1 GO

GO of co-expression module “darkmagenta” after we filter PPI items. Used a custom annotation for gProfiler.

# Already uploaded the first time
upload_GMT_file(gmtfile = "cancer_hdac_genesets.zip")

Your custom annotations ID is gp__ZEow_oHws_ILU You can use this ID as an ‘organism’ name in all the related enrichment tests against this custom source. Just use: gost(my_genes, organism = ’gp__ZEow_oHws_ILU’) [1] “gp__ZEow_oHws_ILU”

geneList <- names(V(netWgcnaPpiFirst))

gostres <- gost(query = geneList, 
                organism = "gp__ZEow_oHws_ILU", ordered_query = FALSE, 
                multi_query = FALSE, significant = TRUE, exclude_iea = FALSE, 
                measure_underrepresentation = FALSE, evcodes = FALSE, 
                user_threshold = 0.05, correction_method = "g_SCS", 
                domain_scope = "annotated", custom_bg = NULL, 
                numeric_ns = "", sources = NULL)
## Detected custom GMT source request
p <- gostplot(gostres, capped = TRUE, interactive = TRUE)

write.csv(apply(gostres$result,2,as.character), file = "gostres_netWgcnaPpiFirst_gp__ZEow_oHws_ILU.tsv", row.names = F)

p

3.2 HDAC2 network

Here we create a network with only HDAC2 interactors.

# Create a network for HDAC2
netWgcnaPpiHDAC2 <- induced.subgraph(netWgcnaPpiFirst, vids = unlist( neighborhood(netWgcnaPpiFirst, order=1, nodes = V(netWgcnaPpiFirst)["HDAC2"]) ) )

netWgcnaPpiHDAC2
## IGRAPH a0010ee UNW- 354 21552 -- 
## + attr: name (v/c), gene_id (v/c), gene_type (v/c), weight (e/n)
## + edges from a0010ee (vertex names):
##  [1] CUL3    --FSCN1    U2AF2   --FSCN1    CUL3    --TOP2B    U2AF2   --TOP2B   
##  [5] CUL3    --HSP90AA1 U2AF2   --CAD      CUL3    --CHERP    U2AF2   --CHERP   
##  [9] FSCN1   --CHERP    TOP2B   --CHERP    HSP90AA1--CHERP    CAD     --CHERP   
## [13] U2AF2   --RPL6     CAD     --RPL6     CHERP   --RPL6     CUL3    --SETD1A  
## [17] U2AF2   --SETD1A   FSCN1   --SETD1A   TOP2B   --SETD1A   HSP90AA1--SETD1A  
## [21] CAD     --SETD1A   CHERP   --SETD1A   RPL6    --SETD1A   CUL3    --RTCB    
## [25] U2AF2   --RTCB     FSCN1   --RTCB     TOP2B   --RTCB     HSP90AA1--RTCB    
## [29] CAD     --RTCB     CHERP   --RTCB     RPL6    --RTCB     SETD1A  --RTCB    
## + ... omitted several edges
summary(E(netWgcnaPpiHDAC2)$weight)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.005502 0.007489 0.010545 0.013691 0.016390 0.125328
# Separate lncRNA and protein_coding for inspection
protein_coding <- names(V(netWgcnaPpiHDAC2)[V(netWgcnaPpiHDAC2)$gene_type == "protein_coding"])
lncRNA <- names(V(netWgcnaPpiHDAC2)[V(netWgcnaPpiHDAC2)$gene_type != "protein_coding"])

# Write results
#write_graph(netWgcnaPpiHDAC2, file = "netWgcnaPpiHDAC2.graphml", format = "graphml")
#write.table(protein_coding, file = "for_RNAct_protein_coding.txt", quote = F, row.names = F, col.names = F)
#write.table(lncRNA, file = "for_RNAct_lncRNA.txt", quote = F, row.names = F, col.names = F)

3.3 Betweenness

We filtered the network on weight (previously) and now we apply a betweenness filter due to the large number of conections among nodes. I calculated the percentiles for edge betweenness to choose a threshold to filter the network.

vcount(netWgcnaPpiHDAC2) #354
## [1] 354
ecount(netWgcnaPpiHDAC2) #21552
## [1] 21552
# Calculate edge betweenness
edgeBet <- edge_betweenness(netWgcnaPpiHDAC2, e = E(netWgcnaPpiHDAC2))

plot(quantile(edgeBet, seq(0, 1, 0.01)), main = "Edge Betweenness Percentile", xlab = "Percentile", ylab = "Edge Betweenness")

quantile(edgeBet, seq(0, 1, 0.01))
##   0%   1%   2%   3%   4%   5%   6%   7%   8%   9%  10%  11%  12%  13%  14%  15% 
##    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0 
##  16%  17%  18%  19%  20%  21%  22%  23%  24%  25%  26%  27%  28%  29%  30%  31% 
##    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0 
##  32%  33%  34%  35%  36%  37%  38%  39%  40%  41%  42%  43%  44%  45%  46%  47% 
##    0    0    0    0    0    0    0    1    1    1    1    1    1    1    1    1 
##  48%  49%  50%  51%  52%  53%  54%  55%  56%  57%  58%  59%  60%  61%  62%  63% 
##    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1    2 
##  64%  65%  66%  67%  68%  69%  70%  71%  72%  73%  74%  75%  76%  77%  78%  79% 
##    2    2    2    2    2    2    3    3    3    4    4    4    5    5    6    7 
##  80%  81%  82%  83%  84%  85%  86%  87%  88%  89%  90%  91%  92%  93%  94%  95% 
##    7    8    9   10   11   12   13   15   16   18   19   21   22   24   26   28 
##  96%  97%  98%  99% 100% 
##   31   33   37   44  221
# Separate the 0.95 percentile
i <- edgeBet > unname(quantile(edgeBet, 0.95))

# Create a subgraph
netWgcnaPpiHDAC2Clean <- subgraph.edges(netWgcnaPpiHDAC2, E(netWgcnaPpiHDAC2)[i], delete.vertices = TRUE)

# Add edge betweenness
E(netWgcnaPpiHDAC2Clean)$eb <- edge_betweenness(netWgcnaPpiHDAC2Clean, e = E(netWgcnaPpiHDAC2Clean))

# Write results to inspect on Cytoscape
#write_graph(netWgcnaPpiHDAC2Clean, file = "netWgcnaPpiHDAC2Clean.graphml", format = "graphml")

Now we have a network - netWgcnaPpiHDAC2Clean with 353 nodes and 1049 edges, which is better to analyze.

3.4 Extract neighbors

We can extract the neighbors of HDAC2 and their gene_type.

neighHdac2 <- names(unlist(neighborhood(netWgcnaPpiHDAC2Clean, order=1, nodes = V(netWgcnaPpiHDAC2Clean)["HDAC2"])))
resHdac2 <- tempAnnot %>% filter(gene_name %in% neighHdac2)

# Add gene location on the genome
resHdac2 <- resHdac2 %>% left_join(annot0, by = c("gene_id", "gene_name", "gene_type")) %>% select(-X)

# Show results
resHdac2
##            gene_id  gene_name            gene_type seqnames     start       end
## 1  ENSG00000080824   HSP90AA1       protein_coding    chr14 102080738 102139699
## 2  ENSG00000130175     PRKCSH       protein_coding    chr19  11435288  11450968
## 3  ENSG00000132507      EIF5A       protein_coding    chr17   7306999   7312463
## 4  ENSG00000157020      SEC13       protein_coding     chr3  10293131  10321112
## 5  ENSG00000182944      EWSR1       protein_coding    chr22  29268009  29300525
## 6  ENSG00000186019 AC021092.1            antisense    chr19  44105463  44113145
## 7  ENSG00000261480    GOLGA8M              lincRNA    chr15  28719377  28738431
## 8  ENSG00000196591      HDAC2       protein_coding     chr6 113933028 114011308
## 9  ENSG00000197728      RPS26       protein_coding    chr12  56041351  56044697
## 10 ENSG00000223745 CCDC18-AS1 processed_transcript     chr1  93262186  93346025
## 11 ENSG00000225986 UBXN10-AS1            antisense     chr1  20184242  20186486
## 12 ENSG00000228323 AC008440.1            antisense    chr19  53854581  53869107
## 13 ENSG00000228509 AC006460.1            antisense     chr2 190676944 190708716
## 14 ENSG00000228543 AC003684.1              lincRNA     chrX   9249920   9275206
## 15 ENSG00000228784  LINC00954              lincRNA     chr2  19868860  19885047
## 16 ENSG00000232485 AC098820.1 processed_transcript     chr2 216479030 216498761
## 17 ENSG00000233393 AP000688.2              lincRNA    chr21  36104881  36109690
## 18 ENSG00000233521  LINC01638              lincRNA    chr22  27221349  27224727
## 19 ENSG00000237429 BX293535.1            antisense     chr1  27525805  27530561
## 20 ENSG00000238197 PAXBP1-AS1            antisense    chr21  32728115  32743122
## 21 ENSG00000241728 AP001062.3    sense_overlapping    chr21  44328944  44330221
## 22 ENSG00000243176 AC092944.1 processed_transcript     chr3 157175223 157381265
## 23 ENSG00000245522 AC026250.1              lincRNA    chr11   9754770   9759533
## 24 ENSG00000245888  NSMCE1-DT            antisense    chr16  27268205  27290492
## 25 ENSG00000246863 AC012377.1              lincRNA    chr15  39921042  39925880
## 26 ENSG00000247679 AC139795.2            antisense     chr5 177611253 177619754
## 27 ENSG00000249180 AC020900.1            antisense     chr5  96741079  96742698
## 28 ENSG00000249695 AC026369.1            antisense    chr12    137411    149169
## 29 ENSG00000249740   OSMR-AS1              lincRNA     chr5  38710367  38845829
## 30 ENSG00000250733    C8orf17                  TEC     chr8 139931172 139933946
## 31 ENSG00000251359   WWC2-AS2              lincRNA     chr4 183097017 183099199
## 32 ENSG00000253307 AC011676.1            antisense     chr8 141252286 141253292
## 33 ENSG00000255449 AP002812.5            antisense    chr11  77866412  77870091
## 34 ENSG00000256139 AC007637.1    sense_overlapping    chr12 109111218 109125594
## 35 ENSG00000256262  USP30-AS1            antisense    chr12 109052350 109053952
## 36 ENSG00000257048  LINC02417              lincRNA    chr12   3367833   3371346
## 37 ENSG00000258458 AL160314.2 processed_transcript    chr14  22701476  22766562
## 38 ENSG00000258819  LINC02289              lincRNA    chr14  77069180  77076344
## 39 ENSG00000260267 AC026471.1            antisense    chr16  31456711  31459736
## 40 ENSG00000260711 AL121839.2       sense_intronic    chr14  91752856  91759798
## 41 ENSG00000261118 AC092123.1            antisense    chr16  89492017  89504460
## 42 ENSG00000261170 AC009053.3            antisense    chr16  74422120  74435254
## 43 ENSG00000262728 AC123768.3            antisense    chr15  32586105  32615158
## 44 ENSG00000263164 AC087500.2              lincRNA    chr17   5240508   5241543
## 45 ENSG00000265282 AC005828.4              lincRNA    chr17  63430468  63432211
## 46 ENSG00000267072  NAGPA-AS1              lincRNA    chr16   5010909   5043999
## 47 ENSG00000267250 AC011591.1              lincRNA    chr17  68793549  68797822
## 48 ENSG00000267655 AC125437.1       sense_intronic    chr18  79117207  79117920
## 49 ENSG00000269053 AC010319.3            antisense    chr19  17419305  17419774
## 50 ENSG00000269068 AC009955.4            antisense     chr2 219559083 219559626
## 51 ENSG00000269707 AC018730.1    sense_overlapping     chr2 104853285 104926052
## 52 ENSG00000270165 AC010530.1       sense_intronic    chr16  67738588  67739922
## 53 ENSG00000272273   IER3-AS1            antisense     chr6  30742929  30743592
## 54 ENSG00000273674 AC021752.1              lincRNA    chr15  50839875  50908599
## 55 ENSG00000273828 AL133227.1            antisense    chr20  46364551  46397994
## 56 ENSG00000275481 AC025031.4              lincRNA    chr12  46388856  46392126
## 57 ENSG00000276412 AL591926.2              lincRNA     chr9  41651658  41654594
## 58 ENSG00000277245 AC084782.3            antisense    chr15  56447120  56447697
## 59 ENSG00000278864 AC055811.4                  TEC    chr17  17181504  17183257
## 60 ENSG00000278935 AC087386.2                  TEC    chr15  20141794  20146430
## 61 ENSG00000279212 AL390961.3                  TEC    chr10  26695091  26697625
## 62 ENSG00000279232 AC008522.1            antisense     chr5  98792861  98795766
## 63 ENSG00000279970 AC023024.2                  TEC    chr15 101299656 101301648
## 64 ENSG00000280078 AC016526.3                  TEC    chr14  76279173  76283103
## 65 ENSG00000285095 AC025887.2            antisense    chr18  32470288  32745567
## 66 ENSG00000285407 AL033530.1              lincRNA     chr1  68679202  68943452
## 67 ENSG00000286039 AC093849.2              lincRNA     chr4 173509633 173511764
##     width strand
## 1   58962      -
## 2   15681      +
## 3    5465      +
## 4   27982      -
## 5   32517      +
## 6    7683      -
## 7   19055      -
## 8   78281      -
## 9    3347      +
## 10  83840      -
## 11   2245      -
## 12  14527      -
## 13  31773      -
## 14  25287      +
## 15  16188      +
## 16  19732      -
## 17   4810      +
## 18   3379      -
## 19   4757      +
## 20  15008      +
## 21   1278      -
## 22 206043      +
## 23   4764      -
## 24  22288      +
## 25   4839      +
## 26   8502      +
## 27   1620      -
## 28  11759      -
## 29 135463      -
## 30   2775      +
## 31   2183      -
## 32   1007      -
## 33   3680      -
## 34  14377      +
## 35   1603      -
## 36   3514      +
## 37  65087      -
## 38   7165      -
## 39   3026      -
## 40   6943      -
## 41  12444      -
## 42  13135      +
## 43  29054      -
## 44   1036      -
## 45   1744      -
## 46  33091      +
## 47   4274      -
## 48    714      +
## 49    470      -
## 50    544      +
## 51  72768      +
## 52   1335      -
## 53    664      +
## 54  68725      -
## 55  33444      +
## 56   3271      +
## 57   2937      -
## 58    578      +
## 59   1754      +
## 60   4637      +
## 61   2535      -
## 62   2906      -
## 63   1993      -
## 64   3931      +
## 65 275280      +
## 66 264251      +
## 67   2132      +
# Arrange by edge betweenness and weight
edgebetResHdac2 <- data.frame(as_edgelist(netWgcnaPpiHDAC2Clean)) %>% mutate(eb = E(netWgcnaPpiHDAC2Clean)$eb) %>% filter(X1 == "HDAC2" | X2 == "HDAC2") %>% arrange(desc(eb))
edgeweiResHdac2 <- data.frame(as_edgelist(netWgcnaPpiHDAC2Clean)) %>% mutate(wei = E(netWgcnaPpiHDAC2Clean)$weight) %>% filter(X1 == "HDAC2" | X2 == "HDAC2") %>% arrange(desc(wei))

#write.csv(resHdac2, file = "resHdac2.csv", row.names = F)
#write.csv(edgebetResHdac2, file = "edgebetResHdac2.csv", row.names = F)
#write.csv(edgeweiResHdac2, file = "edgeweiResHdac2.csv", row.names = F)

head(edgebetResHdac2)
##         X1         X2  eb
## 1    HDAC2 AP001062.3 994
## 2    HDAC2 AC010319.3 741
## 3    HDAC2 AL390961.3 700
## 4    HDAC2      RPS26 674
## 5 HSP90AA1      HDAC2 610
## 6    EIF5A      HDAC2 556
head(edgeweiResHdac2)
##           X1         X2         wei
## 1      HDAC2 AC020900.1 0.009754748
## 2    GOLGA8M      HDAC2 0.008836550
## 3 AC021092.1      HDAC2 0.008804229
## 4      HDAC2  USP30-AS1 0.008636667
## 5      HDAC2   WWC2-AS2 0.008583435
## 6      HDAC2 AC018730.1 0.007823994

4 Conclusion

We created a cleaned HDAC2 network that we can analyze with Cytoscape.

5 SessionInfo

sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.3 LTS
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] gprofiler2_0.2.1 igraph_1.2.6     dplyr_1.0.7     
## 
## loaded via a namespace (and not attached):
##  [1] highr_0.9         pillar_1.6.3      compiler_4.1.1    jquerylib_0.1.4  
##  [5] bitops_1.0-7      tools_4.1.1       digest_0.6.28     viridisLite_0.4.0
##  [9] jsonlite_1.7.2    evaluate_0.14     lifecycle_1.0.1   tibble_3.1.5     
## [13] gtable_0.3.0      pkgconfig_2.0.3   rlang_0.4.11      DBI_1.1.1        
## [17] crosstalk_1.1.1   yaml_2.2.1        xfun_0.26         fastmap_1.1.0    
## [21] httr_1.4.2        stringr_1.4.0     knitr_1.36        htmlwidgets_1.5.4
## [25] generics_0.1.0    vctrs_0.3.8       grid_4.1.1        tidyselect_1.1.1 
## [29] data.table_1.14.2 glue_1.4.2        R6_2.5.1          fansi_0.5.0      
## [33] plotly_4.10.0     rmarkdown_2.11    tidyr_1.1.4       purrr_0.3.4      
## [37] ggplot2_3.3.5     magrittr_2.0.1    scales_1.1.1      ellipsis_0.3.2   
## [41] htmltools_0.5.2   assertthat_0.2.1  colorspace_2.0-2  utf8_1.2.2       
## [45] stringi_1.7.5     RCurl_1.98-1.5    lazyeval_0.2.2    munsell_0.5.0    
## [49] crayon_1.4.1